导入其他数据库hive
--hive导入oracle
sqoop export --table oracle_tb --columns 'createday,stu_comhw_count_all,stu_sum_all' -connect jdbc:oracle:thin:@10.0.1.156:1521:orcl --username vba --password vba --export-dir /report/r_christmas_hw_day_view --input-fields-terminated-by '\001' --input-lines-terminated-by '\n' --direct
--本地文件导入hive
LOAD DATA LOCAL INPATH 'path' OVERWRITE INTO TABLE tmp.tempt_test;
--从hadoop导入hive
LOAD DATA INPATH 'hdfs_path' OVERWRITE INTO TABLE tmp.tempt_test;
说明:从hadoop导入hive表时,是从hadoop文件直接复制到相应的路径下,小文件的命名也不会发生变化
--从oracle导入hive
sqoop import --hive-import --connect jdbc:oracle:thin:@10.6.0.89:1521:orcl --username vba --password vba --verbose -m 1 --table KNOWLEDGE_PIONT_TS --hive-database tmp --hive-table r_school_status_201610
-- python 连接mongo
import commands
from pymongo import MongoClient
from ConfigParser import ConfigParser
def getSubContentType(path):
#连接mongo的相关配置
config_path = '/opt/develop/homework/conf/mongo.cfg'
config = ConfigParser()
config.readfp(open(config_path))
user_name = config.get('mongodb-online','user')
password = config.get('mongodb-online','pw')
ip = config.get('mongodb-online','ip')
mongo_conn = 'mongodb://{un}:{pw}@{ip}/?authSource={auth}'.format(un=user_name,pw=password, ip=ip,auth="admin")
#连接mongo表
print '可用的题目类型sub_content_type_id:'
output = open(path + "/sub_content_type_ids.txt", "w")
client = MongoClient(mongo_conn)
db = client['vox-question']
posts = db.venus_sub_content_type
row = posts.find()
sub_content_type_ids = ()
for line in row:
#print line
sub_content_type_id = line["sub_content_type_id"]
print '------', sub_content_type_id
sub_content_type_ids += (str(sub_content_type_id),)
output.write(str(sub_content_type_id) + "\n")
return sub_content_type_ids
-- python连接mysql
import commands
import smtplib
online_sql_conn = "mysql -h10.0.1.54 -P5001 -udata.athena.2016 -pdata.DATA.17.PWD.ld -e "
def get_database_table_line_num(online_sql_conn, select_sql):
print "online_sql_conn + select_sql:\t", online_sql_conn + select_sql
status, output = commands.getstatusoutput(online_sql_conn + select_sql)
if status == 0:
table_line_num = output.split('\n')[1]
return table_line_num
else:
return -1
select_sql = "\"select count(1) from HS_Athena_Model.mdl_teacher_auth_score where auth_status = 0 and predict = 1 and dt = '" + yesterday + "';\""
table_line_num = get_database_table_line_num(online_sql_conn, select_sql)